Here is the report of the network analysis of the crop health data.

require(dplyr)
library(plyr)
require(reshape)
require(reshape2)
library(gridExtra)
library(lubridate)
library(doBy)
library(cluster)
library(ggplot2)
library(scales)
library(bioDist)
library(vegan)
library(mvtnorm)
library(igraph)
library(WGCNA)
## ==========================================================================
## *
## *  Package WGCNA 1.47 loaded.
## *
## *    Important note: It appears that your system supports multi-threading,
## *    but it is not enabled within WGCNA in R. 
## *    To allow multi-threading within WGCNA with all available cores, use 
## *
## *          allowWGCNAThreads()
## *
## *    within R. Use disableWGCNAThreads() to disable threading if necessary.
## *    Alternatively, set the following environment variable on your system:
## *
## *          ALLOW_WGCNA_THREADS=<number_of_processors>
## *
## *    for example 
## *
## *          ALLOW_WGCNA_THREADS=4
## *
## *    To set the environment variable in linux bash shell, type 
## *
## *           export ALLOW_WGCNA_THREADS=4
## *
## *     before running R. Other operating systems or shells will
## *     have a similar command to achieve the same aim.
## *
## ==========================================================================

Load the survey data

library(RCurl)
file <- getURL("https://docs.google.com/spreadsheets/d/1zB7gNdI7Nk7SuHuPWcjzaKnjuwkvL6sOVMo0zMfuV-c/pub?gid=558862364&single=true&output=csv")
data <- read.csv(text = file)
# save(data, file = "manuscript1/data/skep1data.RData")
#load(file = "manuscript1/data/skep1data.RData")
#Filepath <- "~/Google Drive/1.SKEP1/SKEP1survey.xls"
#data <- readWorksheetFromFile(Filepath, sheet = 1)
data[data == "-"] <- NA # replace '-' with NA
data[data == ""] <- NA # replace 'missing data' with NA

#==== to lower variable names ====
names(data) <- tolower(names(data)) # for more consistancy

select out the column which is not inlcuded in the analysis

data$phase <- NULL # there is only one type yype of phase in the survey
data$identifier <- NULL # this variable is not included in the analysis
data$village <- NULL
data$year <- NULL
data$season <- NULL
data$lat <- NULL
data$long <- NULL
data$fa <- NULL # field area is not include in the analysis
data$fn <- NULL # farmer name can not be included in this survey analysis
data$fp <- NULL # I do not know what is fp
data$lfm <- NULL # there is only one type of land form in this survey
data$ced <- NULL # Date data can not be included in the network analysis
data$cedjul <- NULL
data$hd <- NULL # Date data can not be included in the network analysis
data$hdjul <- NULL
data$cvr <- NULL
data$varcoded <- NULL # I will recode them 
data$fymcoded <- NULL
data$mu <- NULL # no record
data$nplsqm <- NULL
data$rbpx <- NULL # no record
#==== corract the variable type =====
data <- transform(data, 
                  country = as.factor(country),
                  pc = as.factor(pc),
                  cem = as.factor(cem),     
                  ast = as.factor(ast),       
                  ccd = as.numeric(ccd),
                  vartype = as.factor(vartype),
                  fym = as.character(fym),
                  n = as.numeric(n),
                  p = as.numeric(p) ,
                  k = as.numeric(k),
                  mf = as.numeric(mf),        
                  wcp = as.factor(wcp),      
                  iu = as.numeric(iu),     
                  hu = as.numeric(hu),      
                  fu = as.numeric(fu),      
                  cs  = as.factor(cs),      
                  ldg  =  as.numeric(ldg),  
                  yield = as.numeric(yield) ,
                  dscum = as.factor(dscum),   
                  wecum = as.factor(wecum),   
                  ntmax = as.numeric(ntmax), 
                  npmax = as.numeric(npmax),    
                  nltmax = as.numeric(nltmax),  
                  nlhmax = as.numeric(nltmax),  
                  waa = as.numeric(waa),      
                  wba = as.numeric(wba) ,   
                  dhx =  as.numeric(dhx),  
                  whx =  as.numeric(whx),     
                  ssx  = as.numeric(ssx),  
                  wma = as.numeric(wma), 
                  lfa = as.numeric(lfa),
                  lma = as.numeric(lma),   
                  rha  = as.numeric(rha) ,
                  thrx = as.numeric(thrx),    
                  pmx = as.numeric(pmx),    
                  defa  = as.numeric(defa) ,
                  bphx = as.numeric(bphx),   
                  wbpx = as.numeric(wbpx),    
                  awx  = as.numeric(awx), 
                  rbx =as.numeric(rbx),   
                  rbbx = as.numeric(rbbx),  
                  glhx  = as.numeric(glhx), 
                  stbx=as.numeric(stbx),    
                  hbx= as.numeric(hbx),
                  bbx = as.numeric(bbx),    
                  blba = as.numeric(blba),    
                  lba = as.numeric(lba),    
                  bsa = as.numeric(bsa),    
                  blsa = as.numeric(blsa),  
                  nbsa = as.numeric(nbsa),  
                  rsa  = as.numeric(rsa),   
                  lsa = as.numeric(lsa),    
                  shbx = as.numeric(shbx) ,  
                  shrx = as.numeric(shrx),    
                  srx= as.numeric(srx),    
                  fsmx = as.numeric(fsmx),   
                  nbx =  as.numeric(nbx),   
                  dpx = as.numeric(dpx),    
                  rtdx  = as.numeric(rtdx),  
                  rsdx  = as.numeric(rsdx),
                  gsdx  =as.numeric(gsdx),   
                  rtx = as.numeric(rtx)
) 
data$pc <- ifelse(data$pc == "rice", 1, 0)

#Crop establisment method
levels(data$cem)[levels(data$cem) == "trp"] <- 1
levels(data$cem)[levels(data$cem) == "TPR"] <- 1
levels(data$cem)[levels(data$cem) == "DSR"] <- 2
levels(data$cem)[levels(data$cem) == "dsr"] <- 2

# fym there are two type 0 and 1, raw data are recorded as no, yes, and value, if the value is 0 which mean 0 and if the value more than 0 which means 1 

data$fym <- ifelse(data$fym == "no", 0, 
                   ifelse(data$fym == "0", 0, 1
                   )
)

# vartype there are three type treditional varieties, modern varities and hybrid
data$vartype <- ifelse(data$vartype == "tv", 1,
                       ifelse(data$vartype == "mv", 2,
                              ifelse(data$vartype == "hyb", 3, NA
                              )
                       )
)


# wcp weed control management
levels(data$wcp)[levels(data$wcp) == "hand"] <- 1
levels(data$wcp)[levels(data$wcp) == "herb"] <- 2
levels(data$wcp)[levels(data$wcp) == "herb-hand"] <- 3


# Crop Status
levels(data$cs)[levels(data$cs) == "very poor"] <- 1
levels(data$cs)[levels(data$cs) == "poor"] <- 2
levels(data$cs)[levels(data$cs) == "average"] <- 3
levels(data$cs)[levels(data$cs) == "good"] <- 4
levels(data$cs)[levels(data$cs) == "very good"] <- 5
#clean the data
num.data <- apply(data[, -c(1,2)], 2, as.numeric)
num.data <- as.data.frame(as.matrix(num.data))
data <- cbind(data[1:2], num.data)
data <- data[,apply(data[, -c(1,2)], 2, var, na.rm = TRUE) != 0] # exclude the column with variation = 0
data <- data[complete.cases(data),] # exclude row which cantain NA
#==== cluster analysis of the production sitatuon of the survey data ====
start.PS <- "pc"
end.PS <- "fu"
start.col.PS <- match(start.PS, names(data))
end.col.PS <- match(end.PS, names(data))

PS.data <- data[, c(1, start.col.PS:end.col.PS)]

# transform all variable to numeric type

# wss <- (nrow(PS.data)-1)* sum(apply(PS.data, 2, var))
# for (i in 2:15) wss[i] <- sum(kmeans(PS.data, 
#                                      centers=i)$withinss)
# plot(1:15, wss, type="b", xlab="Number of Clusters",
#      ylab="Within groups sum of squares")


#distance matrix
dist.PS <- daisy(PS.data[-1])
## Warning in daisy(PS.data[-1]): binary variable(s) 1, 2, 6 treated as
## interval scaled
cluster.PS <- hclust(dist.PS, method = "average")

dendro.PS <- as.dendrogram(cluster.PS)
plot(dendro.PS, center = T, nodePar = list(lab.cex = 0.6,
                                          lab.col = "black", pch = NA),
     main = "Dendrogram for Production situation")

# draw retangles
rect.hclust(tree = cluster.PS, k=2, border = c("red", "blue"))

#number of members in each cluster
PS.no <- cutree(cluster.PS, k = 2)

# cophenitic correlation
rcluster.PS <- cophenetic(cluster.PS)
#cor(dist.PS, rcluster.PS)

data <- cbind(data, PS.no)
data$PS.no <- as.factor(data$PS.no)
name.country <- levels(data$country)
by.country.data <- list()
for(i in 1:length(name.country)){
  temp <- data %>% 
    filter(country == name.country[i])
  by.country.data[[i]] <- temp
}
g <- ggplot(data, aes(x = country)) +
    geom_histogram(weights = count) +
    stat_bin(geom = "text", aes(label = ..count..), vjust = 1.5, color = "white" ) +
  scale_y_continuous(limits = c(0, 125), breaks = NULL ) +
  theme(legend.position = "none") +
  ggtitle("The no of observation of dataset in each country")
print(g)
## ymax not defined: adjusting position using y instead

par(mfrow=c(1, 2), las = 1)
for(i in 1:length(name.country)) {
g <- ggplot(by.country.data[[i]], aes(x = PS.no)) +
    geom_histogram(weights = count) +
    ggtitle(paste("Histogram PS cluster of", name.country[i])) +
  theme(legend.position="none")
print(g)   
}

# The profile of PS no 
clus.PS.data <- data[, -c(1, 17:56)]
m.PS.data <- melt(clus.PS.data)
## Using country, PS.no as id variables
name.variable <- levels(m.PS.data$variable)
name.PS.no <- levels(m.PS.data$PS.no)
# check the profile of each cluster by histogram 

for(i in 1:length(name.variable)) {
  subtemp <- subset(m.PS.data, variable == name.variable[i])
  for(j in 1: length(name.PS.no)){
    subtemp1 <- subtemp %>% filter(PS.no == name.PS.no[j])
   g <-  ggplot(subtemp1, aes(x = value)) + 
   geom_bar(aes(y = (..count..)/sum(..count..)), binwidth = 1) + 
  scale_y_continuous(limits = c(0, 1), labels = percent) +
ylab("Percent") + xlab(paste(name.variable[i]))
  ggtitle(paste("Histogram of", name.variable[i], "in PS. no", name.PS.no[j]))
print(g) 
  }
}

Network analysis

#head(data)
# select only the variables related to the injury profiles

start.IP <- "dhx"
end.IP <- "rtx"
start.col.IP <- match(start.IP, names(data))
end.col.IP <- match(end.IP, names(data))

IP.data <- data[start.col.IP:end.col.IP]

IP.data <- IP.data[ ,apply(IP.data, 2, var, na.rm = TRUE) != 0] # exclude the column with variation = 0

groups <- paste(data$country, data$PS.no, sep = "_")

IP.data <- cbind(groups, IP.data)

IP.data[is.na(IP.data)] <- 0

trts <- as.vector(unique(IP.data$groups))
#=====co_occurrence_pairwise.R====
results <- matrix(nrow = 0, ncol = 7)
options(warnings = -1)
for(a in 1:length(trts)){
  #pull the first element from the vector of treatments
  trt.temp <- trts[a]
  #subset the dataset for those treatments
  temp <- subset(IP.data, groups == trt.temp)
  
  #in this case the community data started at column 6, so the loop for co-occurrence has to start at that point
  for(b in 2:(dim(temp)[2]-1)){
    #every species will be compared to every other species, so there has to be another loop that iterates down the rest of the columns
    for(c in (b+1):(dim(temp)[2])){
      
      #summing the abundances of species of the columns that will be compared
      species1.ab <- sum(temp[,b])
      species2.ab <- sum(temp[,c])
      #if the column is all 0's no co-occurrence will be performed
      if(species1.ab >1 & species2.ab >1){
        test <- cor.test(temp[,b], temp[,c], method = "spearman", na.action = na.rm, exact = FALSE)
        # There are warnings when setting exact = TRUE because of ties from the output of Spearman's correlation
        # stackoverflow.com/questions/10711395/spear-man-correlation and ties
        # It would be still valid if the data is not normally distributed.
        rho <- test$estimate
        p.value <- test$p.value
      }
      
      if(species1.ab <=1 | species2.ab <= 1){
        rho<-0
        p.value<-1
      } 
      
      new.row <- c(trts[a], names(temp)[b], names(temp)[c], rho, p.value, species1.ab, species2.ab)
      results <- rbind(results, new.row)            
      
    }
    
  }
  print(a/length(trts))
  
}
## [1] 0.1111111
## [1] 0.2222222
## [1] 0.3333333
## [1] 0.4444444
## [1] 0.5555556
## [1] 0.6666667
## [1] 0.7777778
## [1] 0.8888889
## [1] 1
results<-data.frame(data.matrix(results))
names(results)<-c("trt","taxa1","taxa2","rho","p.value","ab1","ab2")

#making sure certain variables are factors
results$trt <- as.factor(results$trt)
results$taxa1 <- as.character(as.factor(results$taxa1))
results$taxa2 <- as.character(as.factor(results$taxa2))
results$rho <- as.numeric(as.character(results$rho))
results$p.value <- as.numeric(as.character(results$p.value))
results$ab1 <- as.numeric(as.character(results$ab1))
results$ab2 <- as.numeric(as.character(results$ab2))

str(results)
## 'data.frame':    2484 obs. of  7 variables:
##  $ trt    : Factor w/ 9 levels "IDN_1","IDN_2",..: 5 5 5 5 5 5 5 5 5 5 ...
##  $ taxa1  : chr  "dhx" "dhx" "dhx" "dhx" ...
##  $ taxa2  : chr  "whx" "ssx" "defa" "bphx" ...
##  $ rho    : num  0.0652 -0.1227 0.1899 -0.117 -0.5074 ...
##  $ p.value: num  0.689188 0.45061 0.240542 0.472163 0.000832 ...
##  $ ab1    : num  1307 1307 1307 1307 1307 ...
##  $ ab2    : num  2089 109 1941 1286 84 ...
head(results)
##     trt taxa1 taxa2         rho      p.value  ab1  ab2
## 1 PHL_1   dhx   whx  0.06524012 0.6891879742 1307 2089
## 2 PHL_1   dhx   ssx -0.12272044 0.4506097948 1307  109
## 3 PHL_1   dhx  defa  0.18989475 0.2405423584 1307 1941
## 4 PHL_1   dhx  bphx -0.11699678 0.4721633481 1307 1286
## 5 PHL_1   dhx  wbpx -0.50743562 0.0008316712 1307   84
## 6 PHL_1   dhx   awx  0.00000000 1.0000000000 1307    1
results_sub <- subset(results, as.numeric(as.character(abs(rho))) > 0.25)

results_sub.by.group <- list()
name.groups <- sort(as.vector(unique(groups)))
for(i in 1: length(name.groups)){
  
  results_sub.by.group[[i]] <- subset(results_sub, trt == name.groups[i])
}

# head(results_sub.by.group[[1]][,2:3]) # get the list

g  <- list()
for(i in 1:length(name.groups)){
  g[[i]] <- graph.edgelist(as.matrix(results_sub.by.group[[i]][,2:3]), directed = FALSE)
#== adjust layout
l <- layout.fruchterman.reingold(g[[i]])
#== adjust vertices

V(g[[i]])$color <- "tomato"
V(g[[i]])$frame.color <- "gray40"
V(g[[i]])$shape <- "circle"
V(g[[i]])$size <- 25
V(g[[i]])$label.color <- "white"
V(g[[i]])$label.font <- 2
V(g[[i]])$label.family <- "Helvetica"
V(g[[i]])$label.cex <- 0.7

#== adjust the edge
E(g[[i]])$weight <- as.matrix(results_sub.by.group[[i]][, 4])
E(g[[i]])$width <- 1 + E(g[[i]])$weight*5

col <- c("firebrick3", "forestgreen")
colc <- cut(results_sub.by.group[[i]]$rho, breaks = c(-1, 0, 1), include.lowest = TRUE)
E(g[[i]])$color <- col[colc]

#== plot network model
plot(g[[i]], layout = l * 1.0, main = paste( "network model of", name.groups[i]))
}

network.value <- list()

for(i in 1:length(g)){

  E(g[[i]])$weight <- as.matrix(abs(results_sub.by.group[[i]][, 4]))

  network.value[[i]] <- data.frame(
  id =V(g[[i]])$name, # name of variable
  deg = degree(g[[i]]), # degree
  bet = betweenness(g[[i]]), # betweenness
  clo = closeness(g[[i]]), # closeness
  eig = evcent(g[[i]])$vector,# egin.cent
  cor = graph.coreness(g[[i]]), # coreness
  tra = transitivity(g[[i]], type = c("local")) # cluster coefficients
)
network.value[[i]]$res <- as.vector(lm(eig ~ bet, data = network.value[[i]])$residuals)
}

One method is to plot / regress eigenvector centrality on betweenness and examine the residue.

for(i in 1:length(network.value)){
 plot <- ggplot(
  network.value[[i]], aes(x = bet, y = eig,
               label = rownames(network.value[[i]]),
               color = res,
               size = abs(res))) +
    xlab("Betweenness Centrality") +
    ylab("Eigencvector Centrality") +
    geom_text() +
    ggtitle(paste("Key Actor Analysis for Injuiry Profiles of", name.groups[i]))

print(plot) 
}

# dd <- degree.distribution(g[[1]], cumulative = T, mode = "all")
# plot(dd, pch = 19, cex = 1, col = "orange", xlab = "Degree", ylab = "Cumulative Frequesncy")
# distances(g[[1]])
# #shortest_paths(g[[1]], 5)
# all_shortest_paths(g[[1]], 1, 6:8)
# mean_distance(g[[1]])
# hist(degree(g[[1]]),  col = "lightblue", xlim = c(0, 10), xlab = "Vertex Degree", ylab = "Frequency", main = "")
# hist(graph.strength(g[[1]]),  col = "pink", xlim = c(0, 5), xlab = "Vertex strength", ylab = "Frequency", main = "")

network.prop <- list()
mat <- NULL
i <- 1
for(i in 1:length(g)){
new.mat <- as.matrix(get.adjacency(g[[i]], attr = "weight"))
new.mat <- as.data.frame(networkConcepts(new.mat)$Summary)[1]
names(new.mat) <- name.groups[[i]]
mat <- c(mat, new.mat)
#net.result <- fundamentalNetworkConcepts(mat[[i]])
#conformityBasedNetworkConcepts(mat)
}
sum.net <- as.data.frame(do.call("cbind", mat))
rownames(sum.net) <- c("Density", "Centralization", "Heterogeneity", "Mean ClusterCoef", "Mean Connectivity")